# Please e-mail mathatfa@gmail.com about bugs in this script.
# 
# reads each Rat in its Excel file, group of muscles in its sheet: Neuronal spikes' times for 16 sweeps
#
# writes to EMG-data.RData: 
#   emg: Spike Counts (spikes_0_70ms, spikes_0.2_1s, spikes_1_10s) by Muscle:StimLocat:Operation:Drug:Lateralization
#   emgLat: Neuronal Spike counts Asymmetry Indices (AI_0_70ms, AI_0.2_1s, AI_0_1s) by Muscle:StimLocat:Operation:Drug
#   emgThr: Recording current thresholds (Threshold) by Muscle:StimLocat:Operation:Drug
#   emgThrLat: Recording current thresholds Asymmetry Indices (AI_Threshold) by Muscle:StimLocat:Operation:Drug
#   emgHindpaw, emgThrHindpaw do not look interesting...

#require("export")
require("tidyverse")
#require("modelr")
require("broom")
require("openxlsx")

theme_set(theme_light())
options(width = 120)
S_width <- 13.33/2.0
S_height <- 7.5/2
S_signif <- 0.3   # coefficient for power point slides, R-plot=0.15
# setwd("/home/dqc/R/ReverseDesign/HarmonizedData Nabe RD 2019-01-23/")
# setwd("/home/drs/R/Nabe-Rat-CCI-flexion/EMG-M.Zhang/HarmonizedData Nabe RD 2019-01-23/")
setwd("C:/DaniilSarkisyan/R/Nabe-Rat-CCI-flexion/EMG-M.Zhang/HarmonizedData Nabe RD 2019-01-23/")


# convert Excel sheet with timestamps of neuronal spikes and times of 16 sweeps into statistics we compare
read.timestamps <- function(wb, sheetName) {
  ts1 <- read.xlsx(wb, sheet=sheetName, colNames=TRUE) %>% 
    mutate_all(as.character) %>% mutate_all(as.numeric)
  
  spikes.len <- apply(ts1,2,function(x) sum(!is.na(x)))
  sweeps <- which(spikes.len == 16)
  if (length(sweeps)!=1) {
    warning("Can not detect timestamps for 16 sweeps")
    return(NULL)
  }
  sweeps <- names(spikes.len)[sweeps]
  tol <- 1e-5
  
  expand.grid(t0=sort(ts1[1:16,sweeps]), emg= setdiff(names(spikes.len),sweeps), stringsAsFactors= FALSE) %>% 
    group_by(emg) %>% 
    mutate( nsweep=min_rank(t0), t1=lead(t0, default=Inf) ) %>%
    rowwise %>% mutate(
      spikes_0_70ms = length(which(t0    -tol <= ts1[[emg]] & ts1[[emg]] <= t0+0.07+tol )),
      spikes_0.2_1s = length(which(t0+0.2-tol <= ts1[[emg]] & ts1[[emg]] <= t0+1   +tol )),
      spikes_0_1s   = length(which(t0    -tol <= ts1[[emg]] & ts1[[emg]] <= t0+1   +tol )),
      spikes_1_10s  = length(which(t0+1  -tol <= ts1[[emg]] & ts1[[emg]] <= t0+10  +tol )),
      # spikes_q50 = quantile(ts1[which(t0+0.2 -tol <= ts1[[emg]] & ts1[[emg]] <= t0+10+tol),emg] - t0, 0.5),
      # spikes_q95 = quantile(ts1[which(t0+0.2 -tol <= ts1[[emg]] & ts1[[emg]] <= t0+10+tol),emg] - t0, 0.95),
      # sT_0_0.2s  = paste(round(ts1[which(t0    -tol <= ts1[[emg]] & ts1[[emg]] <= t0+0.2 -tol),emg]-t0,3), collapse=";"),
      # sT_0.2_10s = paste(round(ts1[which(t0+0.2-tol <= ts1[[emg]] & ts1[[emg]] <= t0+10  +tol),emg]-t0,3), collapse=";"),
      sT_0_10s  = paste(ts1[which(t0    -tol <= ts1[[emg]] & ts1[[emg]] <= t0+10 -tol),emg]-t0, collapse=";")
    ) %>% mutate(FileNo=sheetName)
}

read.rat <- function(RatFile) {
  wb <- loadWorkbook(RatFile)
  ratName <- sub("-timestamps.xlsx","",basename(RatFile), fixed=TRUE)
  # print(paste("reading timestamps for rat =", ratName, "from sheets",paste(names(wb),collapse= " ") ))
  tibble( FileNo=names(wb) ) %>% rowwise %>% do(read.timestamps(wb, .$FileNo)) %>% mutate(RatNo=ratName)
}

## test function
# tmp <- read.rat("Sham31-timestamps.xlsx")
# write_delim(tmp, "tmp.csv", delim="\t")

# read timestamps -------------------
emg16 <- tibble( RatNo= list.files(path=".", pattern="*-timestamps.xlsx", full.names = TRUE) ) %>%
  rowwise %>% do(read.rat(.$RatNo))

emg <- emg16 %>% ungroup %>% separate(emg, c("Hindpaw", "Muscle")) %>%
  mutate( 
    Hindpaw = replace(Hindpaw, which(Hindpaw=="Contr"),"R"),  #TBI57 is Left TBI
    Hindpaw = replace(Hindpaw, which(Hindpaw=="Ipsi"),"L"),   #TBI57 is Left TBI
    Hindpaw = factor(Hindpaw, c("R","L"), c("Right","Left")) # Right, then Left
  ) %>% group_by(RatNo,FileNo,Muscle,Hindpaw)

# write.xlsx(select(emg, RatNo,FileNo,Muscle,Hindpaw,spikes_0_70ms, spikes_0.2_1s, spikes_1_10s) %>% summarise_all(sum),
#            file="EMG Summarize 16 sweeps.xlsx", colNames=TRUE)

# emg %>% ungroup %>% mutate_if(is.character, as.factor) %>% select(-sT_0_10s) %>% summary(maxsum = 100)
# save(emg, file="EMG-RD2019-01-23-data.RData")
# load(file="EMG-RD2019-01-23-data.RData")
rm(emg16)


# annotate autocounted data with Mengliang's data, QC-ed by sum of 16 sweeps  ------------------
emg.annot <- read.xlsx("EMG Nabe Data DS2019-01-29.xlsx", sheet="Data-RD-QC")
emg <- merge(emg.annot, emg, by=c("RatNo","Hindpaw","Muscle","FileNo"),all.x=TRUE, all.y=FALSE,sort=FALSE)

emg <- emg %>% group_by(RatNo,Drug,Muscle,Hindpaw,FileNo) %>% 
  mutate( Spikes_0_70ms = sum(spikes_0_70ms),
          Spikes_0.2_1s  = sum(spikes_0.2_1s),
          Spikes_0_1s    = sum(spikes_0_1s) )

emg <- emg %>% ungroup %>% 
  mutate(err= abs(Spikes_0_70ms - Spikes.0.70ms ) < 4 & 
              abs(Spikes_0.2_1s  - Spikes.0.2.1s ) < 4 &
              abs(Spikes_0_1s    - Spikes.0.1s   ) < 4 )

if(length(which(emg$err == FALSE)) > 0) {
  write.xlsx(emg,file="EMG 16 sweeps full annotation.xlsx", colNames=TRUE)
  stop("Discrepancies detected, see column 'err' in a log file EMG 16 sweeps...xlsx")
}
xtabs(~Operation+StimLocat+Drug+Muscle, data=emg)

# RatNo/FileNo --> SC for groups of Muscle:StimLocat:Later inside intervals 0-70ms, 0.2-1s, 0-1s ----
emg <- emg %>% 
  filter(err==TRUE, 
         paste(Muscle, StimLocat, sep=".") %in% c(
           "EDL.D4", "EDL.D5",
           "Int.D2", "Int.D3", "Int.D4", "Int.D5",
           "PL.D5", "PL2.D5",
           "ST.Heel" 
         )) %>%
  # filter(Muscle %in% c("EDL", "Int", "PL", "PL2", "ST")) %>%
  mutate( Later= factor(OperationSide==Hindpaw, c(FALSE, TRUE), c("Contra", "Ipsi")) ) %>%
  select(RatNo,FileNo, Operation,Drug, OperationSide,Hindpaw,Later, Muscle,StimLocat,
         nsweep,RecCurrent, spikes_0_70ms, spikes_0.2_1s, spikes_0_1s, spikes_1_10s, sT_0_10s,
         Threshold, FileNo.Threshold )
xtabs(~Operation+StimLocat+Drug+Muscle, data=emg)

# SC by Later --> AI = log2((1+Contra)/(1+Ipsi)) to prevent log(0) and division by log(1) ----
emgLat <- emg %>% 
  gather(interv, nspikes, starts_with("spikes_")) %>%
  group_by(RatNo, Operation,Drug, OperationSide,Later, Muscle,StimLocat, nsweep,RecCurrent, interv) %>%
  summarize(nspikes = mean(nspikes, na.rm=TRUE)) %>%   # remove all, but grouping variables + nspikes
  unite(LatInt, Later, interv) %>% spread(LatInt, nspikes) %>%
  transmute( 
    # AI_q50 = log2(Contra_spikes_q50 / Ipsi_spikes_q50),
    # AI_q95 = log2(Contra_spikes_q95 / Ipsi_spikes_q95),
    AI_0_70ms = log2((1+Contra_spikes_0_70ms) / (1+Ipsi_spikes_0_70ms)),
    AI_0.2_1s = log2((1+Contra_spikes_0.2_1s) / (1+Ipsi_spikes_0.2_1s)),
    AI_0_1s = log2((1+Contra_spikes_0_1s) / (1+Ipsi_spikes_0_1s)),
    AI_1_10s   = log2((1+Contra_spikes_1_10s)   / (1+Ipsi_spikes_1_10s)) 
    ) %>% ungroup %>% 
  mutate( Muscle = replace(Muscle, which(Muscle=="PL2"),"PL") )
# emgLat[!complete.cases(emgLat), ]

# SC by Hindpaw, i.e. MeasurementSide --> AI = log2((1+Left)/(1+Right)) ----
emgHindpaw <- emg %>% 
  gather(interv, nspikes, starts_with("spikes_")) %>%
  group_by(RatNo, Operation,Drug, OperationSide,Hindpaw, Muscle,StimLocat, nsweep,RecCurrent, interv) %>%
  summarize(nspikes = mean(nspikes, na.rm=TRUE)) %>%   # remove all, but grouping variables + nspikes
  unite(HindInt, Hindpaw, interv) %>% spread(HindInt, nspikes) %>%
  transmute( 
    # AI_q50 = log2(Left_spikes_q50 / Right_spikes_q50),
    # AI_q95 = log2(Left_spikes_q95 / Right_spikes_q95),
    AI_0_70ms = log2((1+Left_spikes_0_70ms) / (1+Right_spikes_0_70ms)),
    AI_0.2_1s = log2((1+Left_spikes_0.2_1s) / (1+Right_spikes_0.2_1s)),
    AI_0.2_1s = log2((1+Left_spikes_0_1s) / (1+Right_spikes_0_1s)),
    AI_0_1s   = log2((1+Left_spikes_1_10s)   / (1+Right_spikes_1_10s)) ) %>% 
  ungroup %>% 
  mutate( Muscle = replace(Muscle, which(Muscle=="PL2"),"PL") )
# emgHindpaw[!complete.cases(emgHindpaw), ]

# SC by OperationSide --> AI not possible, since Left and Right is never in one rat

# Thresholds by Later, AI = log2(Contra/Ipsi)
emgThr <- emg %>% 
  filter( Threshold > 0 ) %>%
  group_by(RatNo,FileNo, Operation,Drug, OperationSide,Hindpaw,Later, Muscle,StimLocat) %>%
  summarize(Threshold = mean(Threshold, na.rm=TRUE)) #, nnn=sd(Threshold) ) 
# emgThr %>% filter( nnn > 0 )
table(emgThr$Threshold)
table(emgThr$Muscle)

# Thresholds by Later, AI = log2(Contra/Ipsi) ----
emgThrLat <- emg %>% 
  group_by(RatNo, Operation,Drug, OperationSide,Later, Muscle,StimLocat) %>%
  summarize(Threshold = mean(Threshold, na.rm=TRUE)) %>%   # remove all, but grouping variables + nspikes
  spread(Later, Threshold) %>%
  mutate( AI_Threshold = log2(Contra / Ipsi) ) %>% select(-Contra, -Ipsi) %>%
  ungroup %>% 
  mutate( Muscle = replace(Muscle, which(Muscle=="PL2"),"PL") ) %>% 
  filter( complete.cases(.) )

# Thresholds by Measurement Side, AI = log2(Contra/Ipsi) ----
emgThrHindpaw <- emg %>% 
  group_by(RatNo, Operation,Drug, OperationSide,Hindpaw, Muscle,StimLocat) %>%
  summarize(Threshold = mean(Threshold, na.rm=TRUE)) %>%   # remove all, but grouping variables + nspikes
  spread(Hindpaw, Threshold) %>%
  mutate( AI_Threshold = log2(Left / Right) ) %>% select(-Left, -Right) %>%
  ungroup %>% 
  mutate( Muscle = replace(Muscle, which(Muscle=="PL2"),"PL") ) %>% 
  filter( complete.cases(.) )

# SC = Neuron Spike Counts, DS: Should we use NSC+1 to fit negative binomial better?
emg <- emg %>% ungroup %>% mutate( Muscle = replace(Muscle, which(Muscle=="PL2"),"PL") )
emgThr <- emgThr %>% ungroup %>% mutate( Muscle = replace(Muscle, which(Muscle=="PL2"),"PL") )

save(emg, emgLat, emgThr,emgThrLat, file="EMG-RD2019-01-23-data.RData") # Only StimLocat balanced Sham vs TBI
